# -*- coding: utf-8 -*-
import scrapy


class DoubanSpider(scrapy.Spider):
    name = 'douban'
    allowed_domains = ['douban.com']
    start_urls = ['https://movie.douban.com/subject/27605698/comments?start=0&limit=20&sort=new_score&status=P']
    film_url = 'https://movie.douban.com/subject/27605698/comments'

    def parse(self, response):
        item = {}
        comments = []
        for div in response.xpath('//div[@id="comments"]/div'):
            comment = div.xpath('.//span[@class="short"]/text()').extract_first()
            if comment is not None:
                comments.append(comment)
        # 检测下一页
        next_url = response.xpath('//a[text()="后页 >"]/@href').extract_first()
        print(next_url)
        if next_url is not None:
            yield scrapy.Request(
                self.film_url+next_url,
                callback=self.parse
            )

        item['comments'] = comments
        yield item

